World University Rankings

Open necessary datasets

cwurData <- read.csv("cwurData.csv")
educationExpenditure <- read.csv("education_expenditure_supplementary_data.csv")
educationalAttainment <- read.csv("educational_attainment_supplementary_data.csv")
schoolCountry <- read.csv("school_and_country_table.csv")
shanghaiData <- read.csv("shanghaiData.csv")
timesData <- read.csv("timesData.csv")
knitr::kable(head(cwurData,10), caption = "Central World University Rankings information (first 10 rows)")
Central World University Rankings information (first 10 rows)
world_rank institution country national_rank quality_of_education alumni_employment quality_of_faculty publications influence citations broad_impact patents score year
1 Harvard University USA 1 7 9 1 1 1 1 NA 5 100.00 2012
2 Massachusetts Institute of Technology USA 2 9 17 3 12 4 4 NA 1 91.67 2012
3 Stanford University USA 3 17 11 5 4 2 2 NA 15 89.50 2012
4 University of Cambridge United Kingdom 1 10 24 4 16 16 11 NA 50 86.17 2012
5 California Institute of Technology USA 4 2 29 7 37 22 22 NA 18 85.21 2012
6 Princeton University USA 5 8 14 2 53 33 26 NA 101 82.50 2012
7 University of Oxford United Kingdom 2 13 28 9 15 13 19 NA 26 82.34 2012
8 Yale University USA 6 14 31 12 14 6 15 NA 66 79.14 2012
9 Columbia University USA 7 23 21 10 13 12 14 NA 5 78.86 2012
10 University of California, Berkeley USA 8 16 52 6 6 5 3 NA 16 78.55 2012
knitr::kable(head(shanghaiData,10),caption="Shanghai Ranking information (first 10 rows)")
Shanghai Ranking information (first 10 rows)
world_rank university_name national_rank total_score alumni award hici ns pub pcp year
1 Harvard University 1 100.0 100.0 100.0 100.0 100.0 100.0 72.4 2005
2 University of Cambridge 1 73.6 99.8 93.4 53.3 56.6 70.9 66.9 2005
3 Stanford University 2 73.4 41.1 72.2 88.5 70.9 72.3 65.0 2005
4 University of California, Berkeley 3 72.8 71.8 76.0 69.4 73.9 72.2 52.7 2005
5 Massachusetts Institute of Technology (MIT) 4 70.1 74.0 80.6 66.7 65.8 64.3 53.0 2005
6 California Institute of Technology 5 67.1 59.2 68.6 59.8 65.8 52.5 100.0 2005
7 Columbia University 6 62.3 79.4 60.6 56.1 54.2 69.5 45.4 2005
8 Princeton University 7 60.9 63.4 76.8 60.9 48.7 48.5 59.1 2005
9 University of Chicago 8 60.1 75.6 81.9 50.3 44.7 56.4 42.2 2005
10 University of Oxford 2 59.7 64.3 59.1 48.4 55.6 68.4 53.2 2005
knitr::kable(head(educationalAttainment,10),caption="Education attainment information (first 10 rows)")
Education attainment information (first 10 rows)
country_name series_name X1985 X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009 X2010 X2011 X2012 X2013 X2015
Afghanistan Barro-Lee: Average years of primary schooling, age 15+, female 0.33 NA NA 0.44 NA NA NA 0.57 NA NA NA NA 0.75 NA NA NA NA 0.86 NA NA NA NA 1.27 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 15+, total 1.03 NA NA 1.26 NA NA NA 1.54 NA NA NA NA 2.01 NA NA NA NA 2.18 NA NA NA NA 2.64 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 15-19, female 0.83 NA NA 0.95 NA NA NA 1.26 NA NA NA NA 1.92 NA NA NA NA 1.01 NA NA NA NA 2.45 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 15-19, total 2.34 NA NA 2.22 NA NA NA 2.37 NA NA NA NA 3.83 NA NA NA NA 2.26 NA NA NA NA 3.55 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 20-24, female 0.54 NA NA 0.92 NA NA NA 0.94 NA NA NA NA 1.26 NA NA NA NA 2.00 NA NA NA NA 1.29 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 20-24, total 1.52 NA NA 2.51 NA NA NA 2.27 NA NA NA NA 2.48 NA NA NA NA 3.93 NA NA NA NA 2.64 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 25+, female 0.17 NA NA 0.25 NA NA NA 0.37 NA NA NA NA 0.48 NA NA NA NA 0.63 NA NA NA NA 0.81 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 25+, total 0.66 NA NA 0.85 NA NA NA 1.14 NA NA NA NA 1.38 NA NA NA NA 1.69 NA NA NA NA 2.19 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 25-29, female 0.44 NA NA 0.54 NA NA NA 0.92 NA NA NA NA 0.94 NA NA NA NA 1.26 NA NA NA NA 1.92 NA NA NA NA
Afghanistan Barro-Lee: Average years of primary schooling, age 25-29, total 1.28 NA NA 1.52 NA NA NA 2.51 NA NA NA NA 2.27 NA NA NA NA 2.48 NA NA NA NA 3.93 NA NA NA NA
knitr::kable(head(educationExpenditure,10),caption="Education expenditure information (first 10 rows)")
Education expenditure information (first 10 rows)
country institute_type direct_expenditure_type X1995 X2000 X2005 X2009 X2010 X2011
OECD Average All Institutions Public 4.9 4.9 5.0 5.4 5.4 5.3
Australia All Institutions Public 4.5 4.6 4.3 4.5 4.6 4.3
Austria All Institutions Public 5.3 5.4 5.2 5.7 5.6 5.5
Belgium All Institutions Public 5.0 5.1 5.8 6.4 6.4 6.4
Canada All Institutions Public 5.8 5.2 4.8 5.0 5.2 NA
Chile All Institutions Public NA 4.2 3.3 4.1 4.3 3.9
Czech Republic All Institutions Public 4.8 4.2 4.1 4.2 4.1 4.4
Denmark All Institutions Public 6.5 6.4 6.8 7.5 7.6 7.5
Estonia All Institutions Public NA NA 4.7 5.9 5.6 5.2
Finland All Institutions Public 6.6 5.5 5.9 6.3 6.4 6.3
knitr::kable(head(timesData,10),caption="Times Higher Education World University Rankings data information (first 10 rows)")
Times Higher Education World University Rankings data information (first 10 rows)
world_rank university_name country teaching international research citations income total_score num_students student_staff_ratio international_students female_male_ratio year
1 Harvard University United States of America 99.7 72.4 98.7 98.8 34.5 96.1 20,152 8.9 25% 2011
2 California Institute of Technology United States of America 97.7 54.6 98.0 99.9 83.7 96.0 2,243 6.9 27% 33 : 67 2011
3 Massachusetts Institute of Technology United States of America 97.8 82.3 91.4 99.9 87.5 95.6 11,074 9.0 33% 37 : 63 2011
4 Stanford University United States of America 98.3 29.5 98.1 99.2 64.3 94.3 15,596 7.8 22% 42 : 58 2011
5 Princeton University United States of America 90.9 70.3 95.4 99.9 - 94.2 7,929 8.4 27% 45 : 55 2011
6 University of Cambridge United Kingdom 90.5 77.7 94.1 94.0 57.0 91.2 18,812 11.8 34% 46 : 54 2011
6 University of Oxford United Kingdom 88.2 77.2 93.9 95.1 73.5 91.2 19,919 11.6 34% 46 : 54 2011
8 University of California, Berkeley United States of America 84.2 39.6 99.3 97.8 - 91.1 36,186 16.4 15% 50 : 50 2011
9 Imperial College London United Kingdom 89.2 90.0 94.5 88.3 92.9 90.6 15,060 11.7 51% 37 : 63 2011
10 Yale University United States of America 92.1 59.2 89.7 91.5 - 89.5 11,751 4.4 20% 50 : 50 2011
knitr::kable(head(schoolCountry,10),caption="School & country information (first 10 rows)")
School & country information (first 10 rows)
school_name country
Harvard University United States of America
California Institute of Technology United States of America
Massachusetts Institute of Technology United States of America
Stanford University United States of America
Princeton University United States of America
University of Cambridge United Kingdom
University of Oxford United Kingdom
University of California, Berkeley United States of America
Imperial College London United Kingdom
Yale University United States of America
glimpse(cwurData)
## Rows: 2,200
## Columns: 14
## $ world_rank           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ institution          <chr> "Harvard University", "Massachusetts Institute of~
## $ country              <chr> "USA", "USA", "USA", "United Kingdom", "USA", "US~
## $ national_rank        <int> 1, 2, 3, 1, 4, 5, 2, 6, 7, 8, 9, 10, 11, 1, 12, 1~
## $ quality_of_education <int> 7, 9, 17, 10, 2, 8, 13, 14, 23, 16, 15, 21, 31, 3~
## $ alumni_employment    <int> 9, 17, 11, 24, 29, 14, 28, 31, 21, 52, 26, 42, 16~
## $ quality_of_faculty   <int> 1, 3, 5, 4, 7, 2, 9, 12, 10, 6, 8, 14, 24, 31, 20~
## $ publications         <int> 1, 12, 4, 16, 37, 53, 15, 14, 13, 6, 34, 22, 9, 8~
## $ influence            <int> 1, 4, 2, 16, 22, 33, 13, 6, 12, 5, 20, 21, 10, 19~
## $ citations            <int> 1, 4, 2, 11, 22, 26, 19, 15, 14, 3, 28, 16, 8, 23~
## $ broad_impact         <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ patents              <int> 5, 1, 15, 50, 18, 101, 26, 66, 5, 16, 101, 10, 9,~
## $ score                <dbl> 100.00, 91.67, 89.50, 86.17, 85.21, 82.50, 82.34,~
## $ year                 <int> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2~

Review and reveal interesting facts

cwurData %>% group_by(year) %>% 
  select(year,institution,world_rank) %>% top_n(-5, wt = world_rank) -> cwurTop5

plot_ly(cwurTop5, x = ~year) %>%
  add_trace(y = cwurTop5$world_rank, name = cwurTop5$institution, showlegend=TRUE, type = 'scatter', mode = 'lines+markers', color= cwurTop5$institution) %>%
  layout(title="World Ranked Universities by CWUR (2012-2015)",
         xaxis = list(showticklabels = TRUE, tickangle = 0, tickfont = list(size = 8)),
         yaxis = list(title = "World rank"),
         hovermode = 'compare')
cwurPlotYear <- function(nYear) {
  cwurData %>% filter(year==nYear) %>% top_n(10,-world_rank) %>% 
  ggplot(aes(x=reorder(institution,-world_rank), y=world_rank)) + geom_bar(stat="identity", aes(fill=reorder(institution,-world_rank)), colour="black") +
    theme_bw() + coord_flip() +  scale_fill_manual(values=c(rep("lightgreen",7), "#CD7F32", "grey", "gold")) + guides(fill=FALSE) +
    labs(x="Institution", y="World Rank", 
        title=paste("Rank in ",nYear), subtitle="(smaller value is better)")
}
cwurPlotYear(2012) -> d1
cwurPlotYear(2013) -> d2
cwurPlotYear(2014) -> d3
cwurPlotYear(2015) -> d4
grid.arrange(d1,d2,d3,d4, ncol=2)

cwurData %>% group_by(country) %>% summarise(n = length(publications)) %>% top_n(10,n) %>% ungroup() -> c
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=publications, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by publication", 
      title="Rank by publication", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d1
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=citations, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by citations", 
      title="Rank by citations", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d2
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=patents, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by patents", 
      title="Rank by patents", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d3
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=quality_of_education, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by quality of education", 
      title="Rank by quality of education", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d4
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=alumni_employment, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by alumni employment", 
      title="Rank by alumni employment", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d5
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=quality_of_faculty, col=country)) + guides(col=FALSE) +
  geom_boxplot() +  theme_bw() + coord_flip() + 
  labs(x="Country", y="Rank by quality of faculty", 
      title="Rank by quality of faculty", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d6
grid.arrange(d1,d2,d3,d4,d5,d6, ncol=2)

cwurData %>% group_by(country,year) %>% 
  summarise(nr = length(world_rank), minw=min(world_rank), maxw=max(world_rank), avgw=round(mean(world_rank),0)) %>%
  select(country, year, nr, minw, maxw, avgw) %>% ungroup() -> ccwur
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
ccwur$hover <- with(ccwur, 
        paste("Country: ", country, '<br>', 
              "Year: ",year, "<br>",
              "Universities in top: ", nr, "<br>",
              "Min rank in top: ", minw, "<br>",
              "Max rank in top: ", maxw, "<br>",
              "Mean rank in top: ", avgw,"<br>"
              ))
g <- list(
  showframe = TRUE,
  showcoastlines = TRUE,
  projection = list(type = 'orthogonal')
)
plot_geo(ccwur, locationmode = 'country names') %>%
  add_trace(
    z = ~nr, color = ~nr, colors = 'Spectral', frame = ~year,
    text = ~hover, locations=~country, marker = list(line = l)
  ) %>%
  colorbar(title = 'Number of\nuniversities in top', tickprefix = '') %>%
  layout(
    title = with(ccwur, paste('Number of universities in top<br>Source:<a href="http://cwur.org/">Council of World University Ranking</a>')),
    geo = g
  )

Check Shanghai Data

shanghaiDataCld = shanghaiData
shanghaiDataCld$t_score = 
  0.1 * shanghaiDataCld$alumni + 0.2 * shanghaiDataCld$award + 0.2 * shanghaiDataCld$hici + 
  0.2 * shanghaiDataCld$ns + 0.2 * shanghaiDataCld$pub + 0.1 * shanghaiDataCld$pcp
shanghaiDataCld$total_score[is.na(shanghaiDataCld$total_score)] = shanghaiDataCld$t_score[is.na(shanghaiDataCld$total_score)]
shanghaiDataCld = shanghaiDataCld[complete.cases(shanghaiDataCld),]
#Fix the duplicate name for University of California-Berkeley
shanghaiDataCld$university_name[shanghaiDataCld$university_name=="University of California-Berkeley"] <- "University of California, Berkeley"
shanghaiDataCld %>% group_by(year) %>% 
  top_n(10, wt = total_score) %>% select(year,university_name,total_score,alumni, award, hici, ns, pub, pcp) %>% ungroup() -> top10univ
 
 #draw with plotly
 
plot_ly(top10univ, x = ~year) %>%
  add_trace(y = top10univ$total_score, name = top10univ$university_name, showlegend=TRUE, type = 'scatter', mode = 'lines+markers', color= top10univ$university_name) %>%
  layout(title="Shanghai (ARWU) World Ranks (2005-2015)<br>Best ranked universities based on total score", legend = list(orientation = 'h'),
         xaxis = list(showticklabels = TRUE, tickangle = 0, tickfont = list(size = 8)),
         yaxis = list(title = "Total score"),
         hovermode = 'compare')

Shanghai Top Universities by Year

top10SpiderWebYear <- function(nYear) {
    top10univ %>% filter(year==nYear) %>% ungroup() -> top10u
    top10 <- as.data.frame(cbind(top10u[,c(3,4,5,6,7,8,9)]))
    colnames(top10) <- c("Total Score", "Alumni with Nobel", "Awarded Nobel", "Highly Cited", 
                         "Nature&Science", "Publications", "PCAP")
    rownames(top10) <- top10u$university_name
    rmin <- apply(top10,2,min); rmax <- apply(top10,2,max)
     rmax <- 100
     rmin <- 0
    colors_border=c( "tomato", "blue", "gold", "green", "magenta", 
                 "yellow", "grey", "lightblue", "brown", "red", "lightgreen", "cyan" )
    par(mfrow=c(4,3))
    par(mar=c(1,1,5,1))
    for(i in 1:nrow(top10)){
      colorValue<-(col2rgb(as.character(colors_border[i]))%>% as.integer())/255
      radarchart(rbind(rmax,rmin,top10[i,]),
         axistype=2 , 
         pcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 1),
         pfcol=rgb(colorValue[1],colorValue[2],colorValue[3], alpha = 0.5),
         plwd=1 , plty=1,cglcol="grey", cglty=1, axislabcol="grey", cglwd=0.5,vlcex=0.7, 
         title=rownames(top10[i,]))
    }
    title(paste0('\nShanghai World University  Rankings top 10 (',nYear,')'),outer=TRUE,col.main='black',cex.main=1.5)
}
top10SpiderWebYear(2005)
top10SpiderWebYear(2006)

top10SpiderWebYear(2007)

top10SpiderWebYear(2008)

top10SpiderWebYear(2009)

top10SpiderWebYear(2010)

top10SpiderWebYear(2011)

top10SpiderWebYear(2012)

top10SpiderWebYear(2013)

top10SpiderWebYear(2014)

top10SpiderWebYear(2015)